{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:13.276528Z",
     "start_time": "2025-02-11T11:37:13.272516Z"
    }
   },
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ],
   "outputs": [],
   "execution_count": 32
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:13.431894Z",
     "start_time": "2025-02-11T11:37:13.287982Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 用户行为数据\n",
    "train=pd.read_csv('tianchi_fresh_comp_train_item.csv')\n",
    "train"
   ],
   "id": "206249c6567430c9",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          item_id item_geohash  item_category\n",
       "0       100002303          NaN           3368\n",
       "1       100003592          NaN           7995\n",
       "2       100006838          NaN          12630\n",
       "3       100008089          NaN           7791\n",
       "4       100012750          NaN           9614\n",
       "...           ...          ...            ...\n",
       "620913   99994679          NaN           9205\n",
       "620914   99995241          NaN            597\n",
       "620915   99998434          NaN           8099\n",
       "620916   99998861          NaN          12553\n",
       "620917   99999855          NaN           3900\n",
       "\n",
       "[620918 rows x 3 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_geohash</th>\n",
       "      <th>item_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002303</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100003592</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100006838</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100008089</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100012750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620913</th>\n",
       "      <td>99994679</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620914</th>\n",
       "      <td>99995241</td>\n",
       "      <td>NaN</td>\n",
       "      <td>597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620915</th>\n",
       "      <td>99998434</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8099</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620916</th>\n",
       "      <td>99998861</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>620917</th>\n",
       "      <td>99999855</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3900</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>620918 rows × 3 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 33
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:13.451723Z",
     "start_time": "2025-02-11T11:37:13.433131Z"
    }
   },
   "cell_type": "code",
   "source": "train.isnull().sum()",
   "id": "240173aded5a530f",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "item_id               0\n",
       "item_geohash     417508\n",
       "item_category         0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 34
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:21.967579Z",
     "start_time": "2025-02-11T11:37:13.452728Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 商品信息数据\n",
    "user=pd.read_csv('tianchi_fresh_comp_train_user.csv')\n",
    "user"
   ],
   "id": "418a1c1b1104f10b",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "           user_id    item_id  behavior_type user_geohash  item_category  \\\n",
       "0         10001082  285259775              1      97lk14c           4076   \n",
       "1         10001082    4368907              1          NaN           5503   \n",
       "2         10001082    4368907              1          NaN           5503   \n",
       "3         10001082   53616768              1          NaN           9762   \n",
       "4         10001082  151466952              1          NaN           5232   \n",
       "...            ...        ...            ...          ...            ...   \n",
       "23291022  65341491  259008790              1          NaN          13164   \n",
       "23291023  65341491  336404938              1          NaN          13164   \n",
       "23291024  65341491   52142024              1      95qhbsu           5201   \n",
       "23291025  65341491  250557965              1          NaN          13164   \n",
       "23291026  65341491  300315408              1          NaN           1838   \n",
       "\n",
       "                   time  \n",
       "0         2014-12-08 18  \n",
       "1         2014-12-12 12  \n",
       "2         2014-12-12 12  \n",
       "3         2014-12-02 15  \n",
       "4         2014-12-12 11  \n",
       "...                 ...  \n",
       "23291022  2014-12-03 12  \n",
       "23291023  2014-12-03 12  \n",
       "23291024  2014-12-10 22  \n",
       "23291025  2014-12-03 12  \n",
       "23291026  2014-11-29 08  \n",
       "\n",
       "[23291027 rows x 6 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>behavior_type</th>\n",
       "      <th>user_geohash</th>\n",
       "      <th>item_category</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10001082</td>\n",
       "      <td>285259775</td>\n",
       "      <td>1</td>\n",
       "      <td>97lk14c</td>\n",
       "      <td>4076</td>\n",
       "      <td>2014-12-08 18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10001082</td>\n",
       "      <td>4368907</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5503</td>\n",
       "      <td>2014-12-12 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001082</td>\n",
       "      <td>4368907</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5503</td>\n",
       "      <td>2014-12-12 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001082</td>\n",
       "      <td>53616768</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9762</td>\n",
       "      <td>2014-12-02 15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001082</td>\n",
       "      <td>151466952</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5232</td>\n",
       "      <td>2014-12-12 11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23291022</th>\n",
       "      <td>65341491</td>\n",
       "      <td>259008790</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13164</td>\n",
       "      <td>2014-12-03 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23291023</th>\n",
       "      <td>65341491</td>\n",
       "      <td>336404938</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13164</td>\n",
       "      <td>2014-12-03 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23291024</th>\n",
       "      <td>65341491</td>\n",
       "      <td>52142024</td>\n",
       "      <td>1</td>\n",
       "      <td>95qhbsu</td>\n",
       "      <td>5201</td>\n",
       "      <td>2014-12-10 22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23291025</th>\n",
       "      <td>65341491</td>\n",
       "      <td>250557965</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13164</td>\n",
       "      <td>2014-12-03 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23291026</th>\n",
       "      <td>65341491</td>\n",
       "      <td>300315408</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1838</td>\n",
       "      <td>2014-11-29 08</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>23291027 rows × 6 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 35
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:23.082158Z",
     "start_time": "2025-02-11T11:37:21.968767Z"
    }
   },
   "cell_type": "code",
   "source": "user.isnull().sum()",
   "id": "da21a19ea60d6ad9",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "user_id                 0\n",
       "item_id                 0\n",
       "behavior_type           0\n",
       "user_geohash     15911010\n",
       "item_category           0\n",
       "time                    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 36
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:24.767082Z",
     "start_time": "2025-02-11T11:37:23.084199Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 填充确实值\n",
    "user['user_geohash'] = user['user_geohash'].fillna(user['user_geohash'].mode()[0])\n",
    "train['item_geohash'] = train['item_geohash'].fillna(train['item_geohash'].mode()[0])\n"
   ],
   "id": "1ae63bef36df967d",
   "outputs": [],
   "execution_count": 37
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:33.397057Z",
     "start_time": "2025-02-11T11:37:24.768367Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#去除重复值\n",
    "user=user.drop_duplicates()\n",
    "train=train.drop_duplicates()"
   ],
   "id": "c55a26f1099afedf",
   "outputs": [],
   "execution_count": 38
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:39.935864Z",
     "start_time": "2025-02-11T11:37:33.397057Z"
    }
   },
   "cell_type": "code",
   "source": [
    "user_item_count = user.groupby(['user_id', 'item_id', 'behavior_type']).size().unstack(fill_value=0)\n",
    "user_item_count.columns = ['browse_count', 'collect_count', 'cart_count', 'buy_count']\n",
    "user_item_count"
   ],
   "id": "f7fc8d4093332cfc",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                     browse_count  collect_count  cart_count  buy_count\n",
       "user_id   item_id                                                      \n",
       "492       254885                2              0           1          1\n",
       "          2316002               3              0           0          0\n",
       "          3473697               2              0           0          0\n",
       "          6983065               2              0           0          0\n",
       "          8977055               2              0           0          0\n",
       "...                           ...            ...         ...        ...\n",
       "142442955 391981437             2              0           0          0\n",
       "          392122509             5              0           0          0\n",
       "          392304614             1              0           0          0\n",
       "          397280442             1              0           0          0\n",
       "          403495818             1              0           0          0\n",
       "\n",
       "[8858710 rows x 4 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>browse_count</th>\n",
       "      <th>collect_count</th>\n",
       "      <th>cart_count</th>\n",
       "      <th>buy_count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">492</th>\n",
       "      <th>254885</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2316002</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3473697</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6983065</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8977055</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">142442955</th>\n",
       "      <th>391981437</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>392122509</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>392304614</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397280442</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>403495818</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8858710 rows × 4 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 39
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:42.241888Z",
     "start_time": "2025-02-11T11:37:39.937122Z"
    }
   },
   "cell_type": "code",
   "source": [
    "user['time'] = pd.to_datetime(user['time'])\n",
    "user['hour'] = user['time'].dt.hour\n",
    "user['day_of_week'] = user['time'].dt.dayofweek"
   ],
   "id": "51669f1ea9ebcba6",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\大白的泰坦\\AppData\\Local\\Temp\\ipykernel_19580\\1075629923.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  user['time'] = pd.to_datetime(user['time'])\n",
      "C:\\Users\\大白的泰坦\\AppData\\Local\\Temp\\ipykernel_19580\\1075629923.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  user['hour'] = user['time'].dt.hour\n",
      "C:\\Users\\大白的泰坦\\AppData\\Local\\Temp\\ipykernel_19580\\1075629923.py:3: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  user['day_of_week'] = user['time'].dt.dayofweek\n"
     ]
    }
   ],
   "execution_count": 40
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:37:44.678313Z",
     "start_time": "2025-02-11T11:37:42.241888Z"
    }
   },
   "cell_type": "code",
   "source": [
    "merged_data = pd.merge(user_item_count.reset_index(), train, on='item_id', how='left')\n",
    "merged_data"
   ],
   "id": "bcfaca54ad28559f",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "           user_id    item_id  browse_count  collect_count  cart_count  \\\n",
       "0              492     254885             2              0           1   \n",
       "1              492    2316002             3              0           0   \n",
       "2              492    3473697             2              0           0   \n",
       "3              492    6983065             2              0           0   \n",
       "4              492    8977055             2              0           0   \n",
       "...            ...        ...           ...            ...         ...   \n",
       "9617803  142442955  391981437             2              0           0   \n",
       "9617804  142442955  392122509             5              0           0   \n",
       "9617805  142442955  392304614             1              0           0   \n",
       "9617806  142442955  397280442             1              0           0   \n",
       "9617807  142442955  403495818             1              0           0   \n",
       "\n",
       "         buy_count item_geohash  item_category  \n",
       "0                1          NaN            NaN  \n",
       "1                0      956mo92         6247.0  \n",
       "2                0          NaN            NaN  \n",
       "3                0          NaN            NaN  \n",
       "4                0          NaN            NaN  \n",
       "...            ...          ...            ...  \n",
       "9617803          0          NaN            NaN  \n",
       "9617804          0          NaN            NaN  \n",
       "9617805          0          NaN            NaN  \n",
       "9617806          0          NaN            NaN  \n",
       "9617807          0          NaN            NaN  \n",
       "\n",
       "[9617808 rows x 8 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>browse_count</th>\n",
       "      <th>collect_count</th>\n",
       "      <th>cart_count</th>\n",
       "      <th>buy_count</th>\n",
       "      <th>item_geohash</th>\n",
       "      <th>item_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>492</td>\n",
       "      <td>254885</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>492</td>\n",
       "      <td>2316002</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>6247.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>492</td>\n",
       "      <td>3473697</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>492</td>\n",
       "      <td>6983065</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>492</td>\n",
       "      <td>8977055</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617803</th>\n",
       "      <td>142442955</td>\n",
       "      <td>391981437</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617804</th>\n",
       "      <td>142442955</td>\n",
       "      <td>392122509</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617805</th>\n",
       "      <td>142442955</td>\n",
       "      <td>392304614</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617806</th>\n",
       "      <td>142442955</td>\n",
       "      <td>397280442</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617807</th>\n",
       "      <td>142442955</td>\n",
       "      <td>403495818</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9617808 rows × 8 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 41
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:39:47.419959Z",
     "start_time": "2025-02-11T11:39:46.786512Z"
    }
   },
   "cell_type": "code",
   "source": [
    "merged_data['item_category']=merged_data['item_category'].fillna(merged_data['item_category'].mode()[0])\n",
    "merged_data['item_geohash']=merged_data['item_geohash'].fillna(merged_data['item_geohash'].mode()[0])\n",
    "merged_data"
   ],
   "id": "81e66a5ce30a730e",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "           user_id    item_id  browse_count  collect_count  cart_count  \\\n",
       "0              492     254885             2              0           1   \n",
       "1              492    2316002             3              0           0   \n",
       "2              492    3473697             2              0           0   \n",
       "3              492    6983065             2              0           0   \n",
       "4              492    8977055             2              0           0   \n",
       "...            ...        ...           ...            ...         ...   \n",
       "9617803  142442955  391981437             2              0           0   \n",
       "9617804  142442955  392122509             5              0           0   \n",
       "9617805  142442955  392304614             1              0           0   \n",
       "9617806  142442955  397280442             1              0           0   \n",
       "9617807  142442955  403495818             1              0           0   \n",
       "\n",
       "         buy_count item_geohash  item_category  \n",
       "0                1      956mo92        12090.0  \n",
       "1                0      956mo92         6247.0  \n",
       "2                0      956mo92        12090.0  \n",
       "3                0      956mo92        12090.0  \n",
       "4                0      956mo92        12090.0  \n",
       "...            ...          ...            ...  \n",
       "9617803          0      956mo92        12090.0  \n",
       "9617804          0      956mo92        12090.0  \n",
       "9617805          0      956mo92        12090.0  \n",
       "9617806          0      956mo92        12090.0  \n",
       "9617807          0      956mo92        12090.0  \n",
       "\n",
       "[9617808 rows x 8 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>browse_count</th>\n",
       "      <th>collect_count</th>\n",
       "      <th>cart_count</th>\n",
       "      <th>buy_count</th>\n",
       "      <th>item_geohash</th>\n",
       "      <th>item_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>492</td>\n",
       "      <td>254885</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>492</td>\n",
       "      <td>2316002</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>6247.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>492</td>\n",
       "      <td>3473697</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>492</td>\n",
       "      <td>6983065</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>492</td>\n",
       "      <td>8977055</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617803</th>\n",
       "      <td>142442955</td>\n",
       "      <td>391981437</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617804</th>\n",
       "      <td>142442955</td>\n",
       "      <td>392122509</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617805</th>\n",
       "      <td>142442955</td>\n",
       "      <td>392304614</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617806</th>\n",
       "      <td>142442955</td>\n",
       "      <td>397280442</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617807</th>\n",
       "      <td>142442955</td>\n",
       "      <td>403495818</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>956mo92</td>\n",
       "      <td>12090.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9617808 rows × 8 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 43
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:40:29.935017Z",
     "start_time": "2025-02-11T11:40:04.521501Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import precision_score, recall_score, f1_score\n",
    "# 准备特征和标签\n",
    "X = merged_data.drop(['user_id', 'item_id', 'buy_count','item_geohash'], axis=1)\n",
    "y = (merged_data['buy_count'] > 0).astype(int)\n",
    "# 划分训练集和测试集\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "# 模型训练\n",
    "model = LogisticRegression()\n",
    "model.fit(X_train, y_train)"
   ],
   "id": "37ef57955edd4d88",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression()"
      ],
      "text/html": [
       "<style>#sk-container-id-2 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-2 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-2 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-2 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-2 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-2 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression()</pre></div> </div></div></div></div>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 45
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:40:34.542553Z",
     "start_time": "2025-02-11T11:40:33.466105Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 模型预测\n",
    "y_pred = model.predict(X_test)\n",
    "# 计算评估指标\n",
    "precision = precision_score(y_test, y_pred)\n",
    "recall = recall_score(y_test, y_pred)\n",
    "f1 = f1_score(y_test, y_pred)\n",
    "print(f\"Precision: {precision}\")\n",
    "print(f\"Recall: {recall}\")\n",
    "print(f\"F1-score: {f1}\")"
   ],
   "id": "f387dbf24542878d",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Precision: 0.5284126840726353\n",
      "Recall: 0.13526827012025902\n",
      "F1-score: 0.2153970796737189\n"
     ]
    }
   ],
   "execution_count": 46
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-11T11:42:07.394828Z",
     "start_time": "2025-02-11T11:40:47.025656Z"
    }
   },
   "cell_type": "code",
   "source": [
    "best_f1 = 0\n",
    "best_model = None\n",
    "C_values = [0.1, 1, 10]\n",
    "for C in C_values:\n",
    "    model = LogisticRegression(C=C)\n",
    "    model.fit(X_train, y_train)\n",
    "    y_pred = model.predict(X_test)\n",
    "    f1 = f1_score(y_test, y_pred)\n",
    "    if f1 > best_f1:\n",
    "        best_f1 = f1\n",
    "        best_model = model\n",
    "print(f\"Best F1-score after optimization: {best_f1}\")"
   ],
   "id": "d3bd35d4bacee3c6",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  n_iter_i = _check_optimize_result(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best F1-score after optimization: 0.2153970796737189\n"
     ]
    }
   ],
   "execution_count": 47
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
